Diabetes distribution
diabetes |>
count(diabetes_012) |>
plot_ly(
x = ~diabetes_012,
y = ~n,
type = "bar",
color = ~diabetes_012
) |>
layout(
barmode = "stack",
title = "Diabetes Prevalence",
xaxis = list(title = "Age Group"),
yaxis = list(title = "Count")
)
Sex & Age distribution
diabetes |>
count(sex, age) |>
plot_ly(
x = ~age,
y = ~n,
type = "bar",
color = ~sex,
colors = c("tomato","skyblue")
) %>%
layout(
barmode = "group",
title = "Sex and Age Group Distribution",
xaxis = list(title = "Age Group"),
yaxis = list(title = "Count")
)
diabetes |>
group_by(age, sex) |>
summarise(diabetes_rate = mean(diabetes_012 == "Diabetes")) |>
plot_ly(
x = ~age,
y = ~diabetes_rate,
color = ~sex,
colors = c("tomato","skyblue"),
type = "scatter",
mode = "lines+markers"
) |>
layout(
title = "Diabetes Prevalence by Sex and Age Group",
xaxis = list(title = "Age Group"),
yaxis = list(title = "Diabetes Rate")
)
## `summarise()` has grouped output by 'age'. You can
## override using the `.groups` argument.
BMI Distribution
plot_ly(
data = diabetes,
x = ~diabetes_012,
y = ~bmi,
color = ~diabetes_012,
type = "box"
) |>
layout(
title = "BMI Distribution by Diabetes Status",
xaxis = list(title = "Diabetes Status"),
yaxis = list(title = "BMI")
)
# filtering prediabetes
diabetes_nd = diabetes |>
filter(diabetes_012 != "Prediabetes") |>
droplevels()
# long format
binary_long_nd = diabetes_nd |>
select(diabetes_012, all_of(binary_vars)) |>
pivot_longer(
cols = all_of(binary_vars),
names_to = "variable",
values_to = "value"
)
# proportion "Yes" in each group
binary_yes_nd = binary_long_nd |>
filter(value == "Yes") |>
count(variable, diabetes_012) |>
group_by(variable) |>
mutate(prop = n / sum(n)) |>
ungroup()
# proportion "No" in each group
binary_no_nd = binary_long_nd |>
filter(value == "No") |>
count(variable, diabetes_012) |>
group_by(variable) |>
mutate(prop = n / sum(n)) |>
ungroup()
Clinical Conditions vs Diabetes
clinical_vars = c("high_bp", "high_chol", "stroke", "heart_diseaseor_attack")
# answered yes
p_clinical = binary_yes_nd |>
filter(variable %in% clinical_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Clinical Conditions: Proportion 'Yes' by Diabetes Status",
x = NULL,
y = "Proportion 'Yes'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# answered no
p_clinical_no = binary_no_nd |>
filter(variable %in% clinical_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Clinical Variables: Proportion 'No' by Diabetes Status",
x = NULL,
y = "Proportion 'No'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(p_clinical)
ggplotly(p_clinical_no)
Lifestyle vs Diabetes
behavior_vars = c("smoker", "phys_activity", "fruits", "veggies", "hvy_alcohol_consump")
# answered yes
p_behavior = binary_yes_nd |>
filter(variable %in% behavior_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Lifestyle Behaviors: Proportion 'Yes' by Diabetes Status",
x = NULL,
y = "Proportion 'Yes'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# answered no
p_behavior_no = binary_no_nd |>
filter(variable %in% behavior_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Lifestyle Behaviors: Proportion 'No' by Diabetes Status",
x = NULL,
y = "Proportion 'No'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(p_behavior)
ggplotly(p_behavior_no)
Healthcare Access and Function vs Diabetes
access_vars = c("chol_check", "any_healthcare", "no_docbc_cost", "diff_walk", "sex")
# answered yes
p_access = binary_yes_nd |>
filter(variable %in% access_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Access, Functional Limitations, and Sex: Proportion 'Yes'",
x = NULL,
y = "Proportion 'Yes'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# answered no
p_access_no = binary_no_nd |>
filter(variable %in% access_vars) |>
ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Access, Function, Sex: Proportion 'No' by Diabetes Status",
x = NULL,
y = "Proportion 'No'",
fill = "Diabetes status"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(p_access)
ggplotly(p_access_no)